import re

import scrapy


class WhatbuySpider(scrapy.Spider):
    name = 'whatbuy'
    allowed_domains = ['smzdm.com']
    start_urls = ['https://m.smzdm.com/youhui']

    # 重写此方法的目的是为了携带请求头headers
    def start_requests(self):
        headers = {
            "Host": "m.smzdm.com",
            "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
        }

        yield scrapy.Request(url=self.start_urls[0], callback=self.parse, headers=headers)

    def parse(self, response):
        # 从URL中提取page（https://m.smzdm.com/youhui）
        ret = re.match(r"https://m.smzdm.com/youhui/p=(\d+)", response.url)
        if ret:
            start_num = int(ret.group(1))
        else:
            start_num = 1
        li_list = response.xpath('//div[@class="main"]//li')
        for temp in li_list:
            title = temp.xpath('.//div[@class="card-title"]/text()').extract_first().strip()
            price = temp.xpath('.//div[@class="card-price"]/text()').extract_first().strip()
            # 定义变量判断是否包邮，是否用劵
            is_juan = "不用劵"
            is_bao_you = "不包邮"
            ret = re.search(r"(.*)\((.*?)\)", price)
            if ret:
                is_juan = ret.group(2)
                price1 = ret.group(1)
                if "包邮" in price:
                    is_bao_you = "包邮"
                    list1 = print().split("包邮")
                    price = list1[0][:-1]
                else:
                    price = price
            else:
                if "包邮" in price:
                    is_bao_you = "包邮"
                    list1 = price.split("包邮")
                    price = list1[0][:-1]
                else:
                    price = price[:-1]

            place = temp.xpath('.//span[@class="card-mall"]/text()').extract_first()
            time = temp.xpath('.//div[@class="card-actions-left"]/span/span[2]/text()').extract_first()

            if "-" in time:
                time = time
            else:
                time = "12-11"

            item = {
                "商品名称": title,
                "商品价格": price,
                "是否用劵": is_juan,
                "是否包邮": is_bao_you,
                "商品来源": place,
                "日期": time
            }
            print(item)
            yield item
        start_num += 1
        if start_num <= 5:
            next_page_url = "https://m.smzdm.com/youhui/p=%d" % start_num
            yield scrapy.Request(url=next_page_url, callback=self.parse)